################################################################################
#' Sample script for using the tool snscrape to get status_ids, in order to 
#' rehydrate the tweets via rtweet (as that is a well-documented, standard
#' tool). 
#' 
#' Note that this extracts status_id from the URLs, rather than from the 
#' status_id field, as I found that field to be unreliable. 
#' 
#' Copyright (c) 2020-2021 Momin M. Malik
#' This project is licensed under the terms of the MIT license.
################################################################################
# install.packages("jsonlite")
library(jsonlite)
snscrape_dir <- ""

# Read in tweets from snscrape
ldf <- list()
for (f in list.files(snscrape_dir, full.names = T)) {
  con <- file(f)
  ldf[[f]] <- sapply(readLines(con), fromJSON)
  close(con)
  print(f)
}
rm(con, f)

# Extract status_ids from URLs (more reliable than the status_id field)
urls <- unname(unlist(lapply(ldf, function(x) unname(unlist(x[1,])))))
status_ids <- unlist(lapply(strsplit(urls, "/"), function(x) rev(x)[1]))

rm(ldf)

write.csv(data.frame(status_id = status_ids), 
          "status_ids.csv", 
          row.names = F)
